#!/usr/bin/env python3
"""
Phase 6: Coefficient Surface Analysis

Empirical evidence that the demographic-macro relationship is a multi-dimensional
surface, not a single coefficient. Sample composition determines which region of
the surface is estimated.

Tests:
  1. Coefficient path: add countries by income rank, track Z₁
  2. Income-tercile stacking jackknife
  3. Bootstrap confidence bands on the coefficient path
  4. Temporal stability (pre-GFC vs post-GFC paths)
  5. Random-order permutation test (income-ordered vs random)

Outputs:
  - table7_coefficient_path.csv / .md
  - table8_income_tercile_jackknife.csv / .md
  - table9_temporal_stability.csv / .md
  - table10_permutation_test.csv / .md
  - figure1_coefficient_path.png
  - figure2_permutation_envelope.png
"""

import sys
import pandas as pd
import numpy as np
from pathlib import Path

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows")
sys.path.insert(0, str(PROJECT_DIR / "multilateral" / "src"))
from model import PanelGLS

FOLLOWUP_DIR = PROJECT_DIR / "multilateral" / "followup"
TABLE_DIR = PROJECT_DIR / "fragility" / "output" / "tables"
FIG_DIR = PROJECT_DIR / "fragility" / "output" / "figures"
TABLE_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR.mkdir(parents=True, exist_ok=True)

# ── Configuration ─────────────────────────────────────────────────────────
BASELINE_VARS = ['Z_1', 'Z_2', 'Z_3', 'fiscal_bal_gdp', 'kaopen',
                 'nfa_gdp_lag', 'log_rel_opw']
DV_SPECS = {
    'ca_gdp': 'Current Account / GDP',
    'gross_savings_gdp': 'Gross Savings / GDP',
    'gross_investment_gdp': 'Gross Investment / GDP',
    'govt_bond_10y': '10-Year Bond Yield',
}
N_BOOTSTRAP = 500
N_PERMUTATIONS = 1000
STEP_SIZE = 10  # add 10 countries at a time for the path
np.random.seed(42)

# ── Load and prepare panel ────────────────────────────────────────────────
print("=" * 70)
print("PHASE 6: COEFFICIENT SURFACE ANALYSIS")
print("=" * 70)

panel = pd.read_csv(FOLLOWUP_DIR / "data" / "processed" / "full_panel.csv")
panel = panel[(panel['year'] >= 1986) & (panel['year'] <= 2024)].copy()
print(f"Panel: {panel['iso3'].nunique()} countries, {len(panel):,} obs")

# Country-level mean GDP/pc for income ranking
mean_gdp = panel.groupby('iso3')['gdp_pc_ppp'].mean().dropna()
country_income_rank = mean_gdp.sort_values(ascending=False)  # richest first
print(f"Countries with GDP/pc data: {len(country_income_rank)}")

# Income terciles (use unsorted series for correct quantiles)
tercile_cuts = mean_gdp.quantile([1/3, 2/3])
low_cut = tercile_cuts.iloc[0]   # ~$6,764 — below this is Low
high_cut = tercile_cuts.iloc[1]  # ~$21,412 — above this is High

income_tercile = {}
for iso3, gdp in mean_gdp.items():
    if gdp > high_cut:
        income_tercile[iso3] = 'High'
    elif gdp > low_cut:
        income_tercile[iso3] = 'Middle'
    else:
        income_tercile[iso3] = 'Low'

panel['income_tercile'] = panel['iso3'].map(income_tercile)

# OECD classification
OECD = {
    'AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'CHL', 'COL', 'CRI', 'CZE', 'DEU',
    'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR', 'GRC', 'HUN', 'IRL', 'ISL',
    'ISR', 'ITA', 'JPN', 'KOR', 'LTU', 'LUX', 'LVA', 'MEX', 'NLD', 'NOR',
    'NZL', 'POL', 'PRT', 'SVK', 'SVN', 'SWE', 'TUR', 'USA',
}
panel['oecd'] = panel['iso3'].isin(OECD).astype(int)

for terc in ['High', 'Middle', 'Low']:
    n = sum(1 for v in income_tercile.values() if v == terc)
    print(f"  {terc} income: {n} countries")


def fit_z1(data, dep_var, indep_vars):
    """Fit PanelGLS and return Z₁ coefficient, SE, p-value, N, R²."""
    avail = [v for v in indep_vars if v in data.columns]
    comp = data.dropna(subset=[dep_var] + avail).copy()
    if comp['iso3'].nunique() < 5 or len(comp) < 30:
        return None

    gls = PanelGLS()
    gls.fit(comp[dep_var].values, comp[avail].values,
            comp['iso3'].values, comp['year'].values)

    z1_idx = avail.index('Z_1') if 'Z_1' in avail else 0
    return {
        'Z1_coef': gls.beta[z1_idx],
        'Z1_se': gls.se[z1_idx],
        'Z1_p': gls.pvalues[z1_idx],
        'N': gls.n_obs,
        'n_countries': comp['iso3'].nunique(),
        'R2': gls.r_squared,
    }


# ══════════════════════════════════════════════════════════════════════════
# TEST 1: COEFFICIENT PATH — add countries by income rank
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("TEST 1: COEFFICIENT PATH (richest → poorest)")
print("=" * 70)

path_rows = []
ranked_countries = list(country_income_rank.index)

for dv, dv_label in DV_SPECS.items():
    if dv not in panel.columns:
        print(f"  SKIP {dv}: not in panel")
        continue

    print(f"\n  {dv_label}:")

    # Build path in steps
    for step_end in range(STEP_SIZE, len(ranked_countries) + 1, STEP_SIZE):
        countries_in = set(ranked_countries[:step_end])
        sub = panel[panel['iso3'].isin(countries_in)].copy()

        result = fit_z1(sub, dv, BASELINE_VARS)
        if result is None:
            continue

        # Compute OECD share in this subsample
        oecd_share = sub[sub['iso3'].isin(OECD)]['iso3'].nunique() / sub['iso3'].nunique()
        # Mean income of countries in sample
        mean_inc = country_income_rank.loc[
            country_income_rank.index.isin(countries_in)].mean()

        row = {
            'dep_var': dv,
            'dep_var_label': dv_label,
            'n_countries_added': step_end,
            'mean_gdp_pc': mean_inc,
            'oecd_share': oecd_share,
            **result,
        }
        path_rows.append(row)

        sig = '***' if result['Z1_p'] < 0.001 else '**' if result['Z1_p'] < 0.01 \
            else '*' if result['Z1_p'] < 0.05 else ''
        if step_end % 40 == 0 or step_end == STEP_SIZE or step_end >= len(ranked_countries) - STEP_SIZE:
            print(f"    Top {step_end:3d}: Z₁={result['Z1_coef']:8.2f}{sig:3s}  "
                  f"(p={result['Z1_p']:.4f})  N={result['N']:5d}  "
                  f"OECD={oecd_share:.0%}  R²={result['R2']:.3f}")

    # Also add the full sample (all ranked countries)
    countries_in = set(ranked_countries)
    sub = panel[panel['iso3'].isin(countries_in)].copy()
    result = fit_z1(sub, dv, BASELINE_VARS)
    if result:
        oecd_share = sub[sub['iso3'].isin(OECD)]['iso3'].nunique() / sub['iso3'].nunique()
        row = {
            'dep_var': dv,
            'dep_var_label': dv_label,
            'n_countries_added': len(ranked_countries),
            'mean_gdp_pc': country_income_rank.mean(),
            'oecd_share': oecd_share,
            **result,
        }
        path_rows.append(row)

path_df = pd.DataFrame(path_rows)
path_df.to_csv(TABLE_DIR / "table7_coefficient_path.csv", index=False)

# Markdown summary
md = ["# Table 7: Coefficient Path — Adding Countries by Income Rank\n"]
md.append("Countries added from richest to poorest in steps of 10. "
          "Shows how Z₁ evolves as sample composition shifts.\n")

for dv, dv_label in DV_SPECS.items():
    sub = path_df[path_df['dep_var'] == dv]
    if len(sub) == 0:
        continue
    md.append(f"\n## {dv_label}\n")
    md.append("| Top N | Countries | OECD % | Z₁ | SE | p | R² |")
    md.append("|---:|---:|---:|---:|---:|---:|---:|")
    for _, r in sub.iterrows():
        sig = '***' if r['Z1_p'] < 0.001 else '**' if r['Z1_p'] < 0.01 \
            else '*' if r['Z1_p'] < 0.05 else ''
        md.append(f"| {r['n_countries_added']:.0f} | {r['n_countries']:.0f} | "
                  f"{r['oecd_share']:.0%} | {r['Z1_coef']:.2f}{sig} | "
                  f"{r['Z1_se']:.2f} | {r['Z1_p']:.4f} | {r['R2']:.3f} |")

md_text = "\n".join(md)
(TABLE_DIR / "table7_coefficient_path.md").write_text(md_text)
print("\n  Saved table7_coefficient_path")


# ══════════════════════════════════════════════════════════════════════════
# TEST 2: INCOME-TERCILE STACKING JACKKNIFE
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("TEST 2: INCOME-TERCILE JACKKNIFE")
print("=" * 70)

tercile_rows = []
tercile_labels = ['High', 'Middle', 'Low']

for dv, dv_label in DV_SPECS.items():
    if dv not in panel.columns:
        continue

    print(f"\n  {dv_label}:")

    # Full sample
    full_panel = panel[panel['income_tercile'].notna()].copy()
    result_full = fit_z1(full_panel, dv, BASELINE_VARS)
    if result_full is None:
        continue

    tercile_rows.append({
        'dep_var': dv, 'dep_var_label': dv_label,
        'sample': 'Full', **result_full,
    })
    sig = '***' if result_full['Z1_p'] < 0.001 else '**' if result_full['Z1_p'] < 0.01 \
        else '*' if result_full['Z1_p'] < 0.05 else ''
    print(f"    Full:         Z₁={result_full['Z1_coef']:8.2f}{sig}")

    # Each tercile only
    for terc in tercile_labels:
        sub = full_panel[full_panel['income_tercile'] == terc].copy()
        result = fit_z1(sub, dv, BASELINE_VARS)
        if result is None:
            continue
        tercile_rows.append({
            'dep_var': dv, 'dep_var_label': dv_label,
            'sample': f'{terc} only', **result,
        })
        sig = '***' if result['Z1_p'] < 0.001 else '**' if result['Z1_p'] < 0.01 \
            else '*' if result['Z1_p'] < 0.05 else ''
        print(f"    {terc:12s} only: Z₁={result['Z1_coef']:8.2f}{sig}")

    # Leave-one-tercile-out
    for terc in tercile_labels:
        sub = full_panel[full_panel['income_tercile'] != terc].copy()
        result = fit_z1(sub, dv, BASELINE_VARS)
        if result is None:
            continue
        tercile_rows.append({
            'dep_var': dv, 'dep_var_label': dv_label,
            'sample': f'Drop {terc}', **result,
        })
        sig = '***' if result['Z1_p'] < 0.001 else '**' if result['Z1_p'] < 0.01 \
            else '*' if result['Z1_p'] < 0.05 else ''
        print(f"    Drop {terc:7s}:  Z₁={result['Z1_coef']:8.2f}{sig}")

    # Nested stacking: OECD, +upper-middle, +all-middle, full
    oecd_only = full_panel[full_panel['oecd'] == 1].copy()
    oecd_high = full_panel[(full_panel['oecd'] == 1) |
                           (full_panel['income_tercile'] == 'High')].copy()
    oecd_high_mid = full_panel[(full_panel['oecd'] == 1) |
                               (full_panel['income_tercile'].isin(['High', 'Middle']))].copy()

    for label, sub in [('OECD only', oecd_only),
                       ('OECD + High', oecd_high),
                       ('OECD + High + Middle', oecd_high_mid),
                       ('Full panel', full_panel)]:
        result = fit_z1(sub, dv, BASELINE_VARS)
        if result is None:
            continue
        tercile_rows.append({
            'dep_var': dv, 'dep_var_label': dv_label,
            'sample': label, **result,
        })
        sig = '***' if result['Z1_p'] < 0.001 else '**' if result['Z1_p'] < 0.01 \
            else '*' if result['Z1_p'] < 0.05 else ''
        print(f"    {label:24s}: Z₁={result['Z1_coef']:8.2f}{sig}  "
              f"(N={result['N']}, {result['n_countries']} countries)")

tercile_df = pd.DataFrame(tercile_rows)
tercile_df.to_csv(TABLE_DIR / "table8_income_tercile_jackknife.csv", index=False)

# Markdown
md = ["# Table 8: Income-Tercile Jackknife\n"]
md.append("Tests whether Z₁ varies across income groups and whether dropping "
          "any tercile changes the result.\n")

for dv, dv_label in DV_SPECS.items():
    sub = tercile_df[tercile_df['dep_var'] == dv]
    if len(sub) == 0:
        continue
    md.append(f"\n## {dv_label}\n")
    md.append("| Sample | N | Countries | Z₁ | SE | p | R² |")
    md.append("|:---|---:|---:|---:|---:|---:|---:|")
    for _, r in sub.iterrows():
        sig = '***' if r['Z1_p'] < 0.001 else '**' if r['Z1_p'] < 0.01 \
            else '*' if r['Z1_p'] < 0.05 else ''
        md.append(f"| {r['sample']} | {r['N']:.0f} | {r['n_countries']:.0f} | "
                  f"{r['Z1_coef']:.2f}{sig} | {r['Z1_se']:.2f} | "
                  f"{r['Z1_p']:.4f} | {r['R2']:.3f} |")

md_text = "\n".join(md)
(TABLE_DIR / "table8_income_tercile_jackknife.md").write_text(md_text)
print("\n  Saved table8_income_tercile_jackknife")


# ══════════════════════════════════════════════════════════════════════════
# TEST 3: BOOTSTRAP CONFIDENCE BANDS ON THE COEFFICIENT PATH
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print(f"TEST 3: BOOTSTRAP CONFIDENCE BANDS ({N_BOOTSTRAP} draws)")
print("=" * 70)

# Focus on CA/GDP for the bootstrap (most important DV)
TARGET_DV = 'ca_gdp'
boot_rows = []

avail_vars = [v for v in BASELINE_VARS if v in panel.columns]
ca_panel = panel.dropna(subset=[TARGET_DV] + avail_vars).copy()
ca_countries_ranked = [c for c in ranked_countries if c in ca_panel['iso3'].unique()]

# Steps for bootstrap (coarser to keep runtime manageable)
boot_steps = list(range(20, len(ca_countries_ranked) + 1, 20))
if boot_steps[-1] != len(ca_countries_ranked):
    boot_steps.append(len(ca_countries_ranked))

for step_end in boot_steps:
    countries_in = set(ca_countries_ranked[:step_end])
    sub = ca_panel[ca_panel['iso3'].isin(countries_in)].copy()

    # Point estimate
    point = fit_z1(sub, TARGET_DV, BASELINE_VARS)
    if point is None:
        continue

    # Bootstrap: resample countries with replacement
    unique_countries = sub['iso3'].unique()
    boot_z1s = []

    for b in range(N_BOOTSTRAP):
        # Cluster bootstrap at country level
        boot_countries = np.random.choice(unique_countries, size=len(unique_countries),
                                          replace=True)
        # Build bootstrap sample
        boot_dfs = []
        for i, c in enumerate(boot_countries):
            cdata = sub[sub['iso3'] == c].copy()
            # Rename to avoid duplicate entity IDs
            cdata['iso3_boot'] = f"{c}_{i}"
            boot_dfs.append(cdata)

        boot_sample = pd.concat(boot_dfs, ignore_index=True)
        if boot_sample['iso3_boot'].nunique() < 5:
            continue

        try:
            gls = PanelGLS()
            gls.fit(boot_sample[TARGET_DV].values,
                    boot_sample[avail_vars].values,
                    boot_sample['iso3_boot'].values,
                    boot_sample['year'].values)
            z1_idx = avail_vars.index('Z_1')
            boot_z1s.append(gls.beta[z1_idx])
        except Exception:
            continue

    if len(boot_z1s) < 50:
        print(f"  Step {step_end}: insufficient bootstrap draws ({len(boot_z1s)})")
        continue

    boot_arr = np.array(boot_z1s)
    ci_lo = np.percentile(boot_arr, 2.5)
    ci_hi = np.percentile(boot_arr, 97.5)

    boot_rows.append({
        'n_countries_added': step_end,
        'n_countries_est': point['n_countries'],
        'Z1_point': point['Z1_coef'],
        'Z1_boot_mean': boot_arr.mean(),
        'Z1_boot_se': boot_arr.std(),
        'Z1_ci_lo': ci_lo,
        'Z1_ci_hi': ci_hi,
        'Z1_p': point['Z1_p'],
        'N': point['N'],
        'sign_change': (ci_lo < 0 < ci_hi),
    })

    sig = '***' if point['Z1_p'] < 0.001 else '**' if point['Z1_p'] < 0.01 \
        else '*' if point['Z1_p'] < 0.05 else ''
    sc = ' SIGN-CHANGE' if ci_lo < 0 < ci_hi else ''
    print(f"  Top {step_end:3d}: Z₁={point['Z1_coef']:8.2f}{sig:3s}  "
          f"95% CI=[{ci_lo:.1f}, {ci_hi:.1f}]{sc}")

boot_df = pd.DataFrame(boot_rows)
boot_df.to_csv(TABLE_DIR / "table7b_bootstrap_bands.csv", index=False)

# Markdown
md = ["# Table 7b: Bootstrap Confidence Bands on the Coefficient Path\n"]
md.append(f"CA/GDP. {N_BOOTSTRAP} cluster-bootstrap draws at each step. "
          "Countries added richest to poorest.\n")
md.append("| Top N | Countries | Z₁ | 95% CI | p | Sign change? |")
md.append("|---:|---:|---:|:---|---:|:---|")
for _, r in boot_df.iterrows():
    sig = '***' if r['Z1_p'] < 0.001 else '**' if r['Z1_p'] < 0.01 \
        else '*' if r['Z1_p'] < 0.05 else ''
    sc = "YES" if r['sign_change'] else ""
    md.append(f"| {r['n_countries_added']:.0f} | {r['n_countries_est']:.0f} | "
              f"{r['Z1_point']:.2f}{sig} | [{r['Z1_ci_lo']:.1f}, {r['Z1_ci_hi']:.1f}] | "
              f"{r['Z1_p']:.4f} | {sc} |")
md_text = "\n".join(md)
(TABLE_DIR / "table7b_bootstrap_bands.md").write_text(md_text)
print("\n  Saved table7b_bootstrap_bands")


# ══════════════════════════════════════════════════════════════════════════
# TEST 4: TEMPORAL STABILITY — pre-GFC vs post-GFC coefficient paths
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("TEST 4: TEMPORAL STABILITY (pre-GFC vs post-GFC)")
print("=" * 70)

temporal_rows = []
periods = {
    'Pre-GFC (1986-2007)': (1986, 2007),
    'Post-GFC (2008-2024)': (2008, 2024),
    'Full (1986-2024)': (1986, 2024),
}

for period_label, (y_start, y_end) in periods.items():
    period_panel = panel[(panel['year'] >= y_start) & (panel['year'] <= y_end)].copy()
    print(f"\n  {period_label}:")

    for dv, dv_label in DV_SPECS.items():
        if dv not in period_panel.columns:
            continue

        # Income-ordered path (coarser steps for speed)
        period_countries = [c for c in ranked_countries
                           if c in period_panel.dropna(subset=[dv])['iso3'].unique()]

        for step_end in [20, 40, 60, 80, 100, len(period_countries)]:
            if step_end > len(period_countries):
                step_end = len(period_countries)
            countries_in = set(period_countries[:step_end])
            sub = period_panel[period_panel['iso3'].isin(countries_in)].copy()

            result = fit_z1(sub, dv, BASELINE_VARS)
            if result is None:
                continue

            temporal_rows.append({
                'period': period_label,
                'dep_var': dv,
                'dep_var_label': dv_label,
                'n_countries_added': step_end,
                **result,
            })

            if dv == TARGET_DV:
                sig = '***' if result['Z1_p'] < 0.001 else '**' if result['Z1_p'] < 0.01 \
                    else '*' if result['Z1_p'] < 0.05 else ''
                print(f"    CA Top {step_end:3d}: Z₁={result['Z1_coef']:8.2f}{sig}")

temporal_df = pd.DataFrame(temporal_rows)
temporal_df.to_csv(TABLE_DIR / "table9_temporal_stability.csv", index=False)

# Markdown — focus on CA path comparison
md = ["# Table 9: Temporal Stability of the Coefficient Path\n"]
md.append("Does the shape of the income-ordered coefficient path change "
          "between pre-GFC and post-GFC periods?\n")

for dv, dv_label in DV_SPECS.items():
    sub = temporal_df[temporal_df['dep_var'] == dv]
    if len(sub) == 0:
        continue
    md.append(f"\n## {dv_label}\n")
    md.append("| Period | Top N | Countries | Z₁ | SE | p | R² |")
    md.append("|:---|---:|---:|---:|---:|---:|---:|")
    for _, r in sub.iterrows():
        sig = '***' if r['Z1_p'] < 0.001 else '**' if r['Z1_p'] < 0.01 \
            else '*' if r['Z1_p'] < 0.05 else ''
        md.append(f"| {r['period']} | {r['n_countries_added']:.0f} | "
                  f"{r['n_countries']:.0f} | {r['Z1_coef']:.2f}{sig} | "
                  f"{r['Z1_se']:.2f} | {r['Z1_p']:.4f} | {r['R2']:.3f} |")

md_text = "\n".join(md)
(TABLE_DIR / "table9_temporal_stability.md").write_text(md_text)
print("\n  Saved table9_temporal_stability")


# ══════════════════════════════════════════════════════════════════════════
# TEST 5: RANDOM-ORDER PERMUTATION TEST
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print(f"TEST 5: RANDOM-ORDER PERMUTATION ({N_PERMUTATIONS} permutations)")
print("=" * 70)

# Focus on CA/GDP
avail_vars = [v for v in BASELINE_VARS if v in panel.columns]
ca_panel = panel.dropna(subset=[TARGET_DV] + avail_vars).copy()
ca_countries_all = list(ca_panel['iso3'].unique())

# Use same steps as path analysis
perm_steps = list(range(20, len(ca_countries_all) + 1, 20))
if perm_steps[-1] != len(ca_countries_all):
    perm_steps.append(len(ca_countries_all))

# Income-ordered path (reference)
income_path = {}
ca_countries_income_ranked = [c for c in ranked_countries
                              if c in ca_panel['iso3'].unique()]
for step_end in perm_steps:
    if step_end > len(ca_countries_income_ranked):
        break
    countries_in = set(ca_countries_income_ranked[:step_end])
    sub = ca_panel[ca_panel['iso3'].isin(countries_in)].copy()
    result = fit_z1(sub, TARGET_DV, BASELINE_VARS)
    if result:
        income_path[step_end] = result['Z1_coef']

# Random permutations
print(f"  Running {N_PERMUTATIONS} random-order permutations...")
perm_z1_by_step = {s: [] for s in perm_steps}

for perm in range(N_PERMUTATIONS):
    shuffled = list(ca_countries_all)
    np.random.shuffle(shuffled)

    for step_end in perm_steps:
        if step_end > len(shuffled):
            break
        countries_in = set(shuffled[:step_end])
        sub = ca_panel[ca_panel['iso3'].isin(countries_in)].copy()

        result = fit_z1(sub, TARGET_DV, BASELINE_VARS)
        if result:
            perm_z1_by_step[step_end].append(result['Z1_coef'])

    if (perm + 1) % 100 == 0:
        print(f"    Permutation {perm + 1}/{N_PERMUTATIONS}")

# Compile results
perm_rows = []
for step_end in perm_steps:
    perm_vals = perm_z1_by_step[step_end]
    if len(perm_vals) < 10:
        continue

    perm_arr = np.array(perm_vals)
    income_z1 = income_path.get(step_end, np.nan)

    # Where does the income-ordered value fall in the permutation distribution?
    if not np.isnan(income_z1):
        perm_rank = np.mean(perm_arr <= income_z1)  # percentile
    else:
        perm_rank = np.nan

    perm_rows.append({
        'step': step_end,
        'income_ordered_Z1': income_z1,
        'perm_mean': perm_arr.mean(),
        'perm_sd': perm_arr.std(),
        'perm_p05': np.percentile(perm_arr, 5),
        'perm_p25': np.percentile(perm_arr, 25),
        'perm_p50': np.percentile(perm_arr, 50),
        'perm_p75': np.percentile(perm_arr, 75),
        'perm_p95': np.percentile(perm_arr, 95),
        'income_percentile': perm_rank,
        'outside_90': income_z1 < np.percentile(perm_arr, 5) or
                      income_z1 > np.percentile(perm_arr, 95)
                      if not np.isnan(income_z1) else None,
    })

perm_df = pd.DataFrame(perm_rows)
perm_df.to_csv(TABLE_DIR / "table10_permutation_test.csv", index=False)

# Markdown
md = ["# Table 10: Random-Order Permutation Test\n"]
md.append(f"CA/GDP. {N_PERMUTATIONS} random country orderings compared to "
          "income-ranked ordering.\n")
md.append("If income ordering produces Z₁ values outside the 90% permutation "
          "envelope, the surface has income structure.\n")
md.append("| Step | Income-ordered Z₁ | Perm median | Perm 90% range | "
          "Income %-ile | Outside 90%? |")
md.append("|---:|---:|---:|:---|---:|:---|")
for _, r in perm_df.iterrows():
    outside = "YES" if r['outside_90'] else ""
    md.append(f"| {r['step']:.0f} | {r['income_ordered_Z1']:.2f} | "
              f"{r['perm_p50']:.2f} | [{r['perm_p05']:.1f}, {r['perm_p95']:.1f}] | "
              f"{r['income_percentile']:.0%} | {outside} |")

md_text = "\n".join(md)
(TABLE_DIR / "table10_permutation_test.md").write_text(md_text)
print("\n  Saved table10_permutation_test")


# ══════════════════════════════════════════════════════════════════════════
# FIGURES
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("GENERATING FIGURES")
print("=" * 70)

try:
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    # ── Figure 1: Coefficient Path with Bootstrap CIs ─────────────────
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot the path from table7 for CA/GDP
    ca_path = path_df[path_df['dep_var'] == 'ca_gdp'].sort_values('n_countries_added')

    ax.plot(ca_path['n_countries_added'], ca_path['Z1_coef'],
            'b-o', linewidth=2, markersize=4, label='Z₁ coefficient', zorder=3)

    # Add bootstrap CIs if available
    if len(boot_df) > 0:
        ax.fill_between(boot_df['n_countries_added'],
                        boot_df['Z1_ci_lo'], boot_df['Z1_ci_hi'],
                        alpha=0.2, color='blue', label='95% bootstrap CI')

    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax.set_xlabel('Number of countries (richest → poorest)', fontsize=12)
    ax.set_ylabel('Z₁ coefficient on CA/GDP', fontsize=12)
    ax.set_title('Coefficient Path: How Z₁ Evolves as Sample Broadens', fontsize=14)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

    # Add OECD share annotation on top axis
    ax2 = ax.twiny()
    ax2.set_xlim(ax.get_xlim())
    oecd_ticks = [20, 60, 100, 140, ca_path['n_countries_added'].max()]
    oecd_labels = []
    for n in oecd_ticks:
        row = ca_path.iloc[(ca_path['n_countries_added'] - n).abs().argsort()[:1]]
        if len(row) > 0:
            oecd_labels.append(f"{row['oecd_share'].values[0]:.0%}")
        else:
            oecd_labels.append("")
    ax2.set_xticks(oecd_ticks)
    ax2.set_xticklabels(oecd_labels)
    ax2.set_xlabel('OECD share of countries', fontsize=10)

    plt.tight_layout()
    fig.savefig(FIG_DIR / "figure1_coefficient_path.png", dpi=150, bbox_inches='tight')
    plt.close()
    print("  Saved figure1_coefficient_path.png")

    # ── Figure 2: Permutation Envelope ────────────────────────────────
    fig, ax = plt.subplots(figsize=(10, 6))

    # Random envelope
    ax.fill_between(perm_df['step'], perm_df['perm_p05'], perm_df['perm_p95'],
                    alpha=0.2, color='gray', label='90% random-order envelope')
    ax.fill_between(perm_df['step'], perm_df['perm_p25'], perm_df['perm_p75'],
                    alpha=0.3, color='gray', label='50% random-order envelope')
    ax.plot(perm_df['step'], perm_df['perm_p50'],
            'k--', alpha=0.5, label='Random-order median')

    # Income-ordered path
    ax.plot(perm_df['step'], perm_df['income_ordered_Z1'],
            'r-o', linewidth=2.5, markersize=5,
            label='Income-ordered path', zorder=4)

    ax.axhline(y=0, color='gray', linestyle=':', alpha=0.5)
    ax.set_xlabel('Number of countries in sample', fontsize=12)
    ax.set_ylabel('Z₁ coefficient on CA/GDP', fontsize=12)
    ax.set_title('Income-Ordered Path vs Random-Order Envelope', fontsize=14)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    fig.savefig(FIG_DIR / "figure2_permutation_envelope.png", dpi=150, bbox_inches='tight')
    plt.close()
    print("  Saved figure2_permutation_envelope.png")

    # ── Figure 3: Multi-DV Coefficient Paths ──────────────────────────
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    axes = axes.flatten()

    for idx, (dv, dv_label) in enumerate(DV_SPECS.items()):
        ax = axes[idx]
        sub = path_df[path_df['dep_var'] == dv].sort_values('n_countries_added')
        if len(sub) == 0:
            ax.set_title(f'{dv_label}\n(no data)')
            continue

        ax.plot(sub['n_countries_added'], sub['Z1_coef'],
                'b-o', linewidth=2, markersize=3)
        ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
        ax.set_xlabel('Countries (richest → poorest)')
        ax.set_ylabel('Z₁ coefficient')
        ax.set_title(dv_label, fontsize=12, fontweight='bold')
        ax.grid(True, alpha=0.3)

        # Shade significance
        sig_mask = sub['Z1_p'] < 0.05
        if sig_mask.any():
            ax.fill_between(sub['n_countries_added'], sub['Z1_coef'],
                           0, where=sig_mask, alpha=0.1, color='blue')

    plt.suptitle('Coefficient Paths Across Dependent Variables', fontsize=14, y=1.02)
    plt.tight_layout()
    fig.savefig(FIG_DIR / "figure3_multi_dv_paths.png", dpi=150, bbox_inches='tight')
    plt.close()
    print("  Saved figure3_multi_dv_paths.png")

    # ── Figure 4: Temporal Stability ──────────────────────────────────
    fig, ax = plt.subplots(figsize=(10, 6))

    for period_label, color, marker in [
        ('Pre-GFC (1986-2007)', 'blue', 's'),
        ('Post-GFC (2008-2024)', 'red', '^'),
        ('Full (1986-2024)', 'black', 'o'),
    ]:
        sub = temporal_df[(temporal_df['dep_var'] == 'ca_gdp') &
                          (temporal_df['period'] == period_label)].sort_values('n_countries_added')
        if len(sub) > 0:
            ax.plot(sub['n_countries_added'], sub['Z1_coef'],
                    f'-{marker}', color=color, linewidth=2, markersize=5,
                    label=period_label)

    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax.set_xlabel('Number of countries (richest → poorest)', fontsize=12)
    ax.set_ylabel('Z₁ coefficient on CA/GDP', fontsize=12)
    ax.set_title('Temporal Stability: Pre-GFC vs Post-GFC Coefficient Paths', fontsize=14)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    fig.savefig(FIG_DIR / "figure4_temporal_stability.png", dpi=150, bbox_inches='tight')
    plt.close()
    print("  Saved figure4_temporal_stability.png")

except ImportError:
    print("  matplotlib not available — skipping figures")


# ══════════════════════════════════════════════════════════════════════════
# SUMMARY
# ══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)

# Path range for CA/GDP
ca_path = path_df[path_df['dep_var'] == 'ca_gdp']
if len(ca_path) > 0:
    z1_min = ca_path['Z1_coef'].min()
    z1_max = ca_path['Z1_coef'].max()
    print(f"  CA/GDP Z₁ range across income-ordered path: [{z1_min:.1f}, {z1_max:.1f}]")
    print(f"  Spread: {z1_max - z1_min:.1f} percentage points")

# Tercile results
if len(tercile_df) > 0:
    ca_terciles = tercile_df[(tercile_df['dep_var'] == 'ca_gdp') &
                             (tercile_df['sample'].str.contains('only'))]
    if len(ca_terciles) > 0:
        print(f"\n  Income tercile Z₁ on CA/GDP:")
        for _, r in ca_terciles.iterrows():
            sig = '***' if r['Z1_p'] < 0.001 else '**' if r['Z1_p'] < 0.01 \
                else '*' if r['Z1_p'] < 0.05 else ''
            print(f"    {r['sample']:15s}: {r['Z1_coef']:8.2f}{sig}")

# Permutation test
if len(perm_df) > 0:
    outside = perm_df['outside_90'].sum()
    total = len(perm_df)
    print(f"\n  Permutation test: income-ordered path outside 90% envelope "
          f"at {outside}/{total} steps")

print(f"\nAll outputs saved to:")
print(f"  Tables: {TABLE_DIR}")
print(f"  Figures: {FIG_DIR}")
print("\nPhase 6 complete.")
